# -*- coding: utf-8 -*-
"""
Statement pre-processors.
语句预处理器。
"""


def clean_whitespace(chatbot, statement):
    """
    Remove any consecutive whitespace characters from the statement text.

    从语句文本删除任何连续的空格字符。
    """
    import re

    # Replace linebreaks and tabs with spaces 用空格替换换行符和制表符
    statement.text = statement.text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')

    # Remove any leeding or trailing whitespace 删除任何出血或尾随空格
    statement.text = statement.text.strip()

    # Remove consecutive spaces 删除连续的空间
    statement.text = re.sub(' +', ' ', statement.text)

    return statement


def unescape_html(chatbot, statement):
    """
    Convert escaped html characters into unescaped html characters.
    For example: "&lt;b&gt;" becomes "<b>".

    转换HTML字符转义HTML字符了。
    例如：“
    """
    import sys

    # Replace HTML escape characters 更换排气中的HTML
    if sys.version_info[0] < 3:
        from HTMLParser import HTMLParser
        html = HTMLParser()
    else:
        import html

    statement.text = html.unescape(statement.text)

    return statement


def convert_to_ascii(chatbot, statement):
    """
    Converts unicode characters to ASCII character equivalents.
    For example: "på fédéral" becomes "pa federal".
    将Unicode字符的ASCII字符来等效。
    例如：“På联邦”变成“PA联邦”。
    """
    import unicodedata
    import sys

    # Normalize unicode characters
    #Unicode字符的规范
    if sys.version_info[0] < 3:
        statement.text = unicode(statement.text) # NOQA

    text = unicodedata.normalize('NFKD', statement.text)
    text = text.encode('ascii', 'ignore').decode('utf-8')

    statement.text = str(text)
    return statement
